datadir <- '~/UROP'
source(file.path(datadir, 'R/unsupervised.R'))
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Attaching SeuratObject
library(ggpubr)
## Loading required package: ggplot2
We now demonstrate a semi-supervised approach using a cancer-containing spatial dataset that has initial cell type labels. The initial cancer-cell labels were determined using known marker genes, while the other labels were generated with RCTD.
First, we add the cancer cell labels to the raw RCTD object and augment the original SpatialRNA object with the full list of genes.
myRCTD <- readRDS(file.path(datadir, '/Data/myRCTD_201014_03.rds'))
results <- read.csv(file = file.path(datadir, '/Data/results_processed_slideseq_data_2020-12-12_Puck_201014_03_2020-12-12_Puck_201014_03_cell_types.csv'))
cancer_results <- results[results$HRS_location == TRUE,]
results <- myRCTD@results$results_df
results$first_type = factor(results$first_type, levels=append(levels(results$first_type), 'Cancer_cells'))
results$second_type = factor(results$second_type, levels=append(levels(results$second_type), 'Cancer_cells'))
results[cancer_results$barcodes, 'spot_class'] <- 'singlet'
results[cancer_results$barcodes,'first_type'] <- 'Cancer_cells'
# below modifies cancer cell weight
for (barcode in cancer_results$barcodes) {
if (barcode %in% rownames(myRCTD@results$weights_doublet)) {
myRCTD@results$weights_doublet[barcode,'first_type'] <- 1
}
}
myRCTD@results$results_df <- results
myRCTD@cell_type_info$info[[2]] <- append(myRCTD@cell_type_info$info[[2]], 'Cancer_cells')
myRCTD@cell_type_info$renorm[[2]] <- append(myRCTD@cell_type_info$renorm[[2]], 'Cancer_cells')
myRCTD@cell_type_info$info[[3]] <- myRCTD@cell_type_info$info[[3]] + 1
myRCTD@cell_type_info$renorm[[3]] <- myRCTD@cell_type_info$renorm[[3]] + 1
class_df <- myRCTD@internal_vars$class_df
class_df['Cancer_cells', 'class'] = 'Cancer_cells'
myRCTD@internal_vars$class_df <- class_df
myRCTD@config$max_cores <- 4
counts <- read.csv(file = file.path(datadir, '/Data/MappedDGEForR.csv'))
rownames(counts) <- counts$GENE; counts$GENE <- NULL
nUMI <- colSums(counts)
coords <- myRCTD@originalSpatialRNA@coords
puck <- SpatialRNA(coords, counts, nUMI)
myRCTD@originalSpatialRNA <- puck
saveRDS(puck, file.path(datadir, '/Data/cancer_puck.rds'))
saveRDS(myRCTD, file.path(datadir, '/Objects/vignette_v1/cancer_RCTD_initial.rds'))
We now run the unsupervised learning algorithm with the updated gene list and the new labels, using CSIDE on the new labels to generate our initialization.
myRCTD <- readRDS(file.path(datadir, 'Objects/vignette_v1/cancer_RCTD_initial.rds'))
RCTD_list <- run.de.initialization(myRCTD, convergence_thresh = 0.995)
saveRDS(RCTD_list, file.path(datadir, '/Objects/vignette_v1/cancer_RCTD_final.rds'))
We can plot the cell types spatially before and after the learning algorithm to compare the assignments.
RCTD_init <- readRDS(file.path(datadir, 'Objects/vignette_v1/cancer_RCTD_initial.rds'))
RCTD_list <- readRDS(file.path(datadir, 'Objects/vignette_v1/cancer_RCTD_final.rds'))
RCTD_pred <- RCTD_list[[length(RCTD_list)]]
plot_all_cell_types(RCTD_init@results$results_df, RCTD_init@originalSpatialRNA@coords, RCTD_init@cell_type_info$renorm[[2]], '..')
plot_all_cell_types(RCTD_pred@results$results_df, RCTD_pred@originalSpatialRNA@coords, RCTD_pred@cell_type_info$renorm[[2]], '..')
We can also find marker genes for the new cancer cell type.
get_marker_data <- function(cell_type_names, cell_type_means, gene_list) {
marker_means = cell_type_means[gene_list,]
marker_norm = marker_means / rowSums(marker_means)
marker_data = as.data.frame(cell_type_names[max.col(marker_means)])
marker_data$max_epr <- apply(cell_type_means[gene_list,],1,max)
colnames(marker_data) = c("cell_type",'max_epr')
rownames(marker_data) = gene_list
marker_data$log_fc <- 0
epsilon <- 1e-9
for(cell_type in unique(marker_data$cell_type)) {
cur_genes <- gene_list[marker_data$cell_type == cell_type]
other_mean = rowMeans(cell_type_means[cur_genes,cell_type_names != cell_type])
marker_data$log_fc[marker_data$cell_type == cell_type] <- log(epsilon + cell_type_means[cur_genes,cell_type]) - log(epsilon + other_mean)
}
return(marker_data)
}
cur_cell_types <- c('CD4_T_cells', 'CD8_T_cells', 'Monocytes_macrophages', 'Cancer_cells')
puck <- RCTD_pred@spatialRNA
cell_type_info_restr = RCTD_pred@cell_type_info$info
cell_type_info_restr[[1]] = cell_type_info_restr[[1]][,cur_cell_types]
cell_type_info_restr[[2]] = cur_cell_types; cell_type_info_restr[[3]] = length(cur_cell_types)
de_genes <- get_de_genes(cell_type_info_restr, puck, fc_thresh = 1.5, expr_thresh = .0001, MIN_OBS = 3)
## get_de_genes: CD4_T_cells found DE genes: 0
## get_de_genes: CD8_T_cells found DE genes: 0
## get_de_genes: Monocytes_macrophages found DE genes: 1
## get_de_genes: Cancer_cells found DE genes: 9
## get_de_genes: total DE genes: 10
marker_data_de = get_marker_data(cell_type_info_restr[[2]], cell_type_info_restr[[1]], de_genes)
marker_data_de[marker_data_de$cell_type == 'Cancer_cells', ]
## cell_type max_epr log_fc
## CLC Cancer_cells 0.0004681097 2.391379
## CCL17 Cancer_cells 0.0062723508 1.785208
## TXN Cancer_cells 0.0033601797 1.717347
## BATF3 Cancer_cells 0.0003970375 1.651421
## DHRS2 Cancer_cells 0.0002454216 1.777520
## IL26 Cancer_cells 0.0001407837 1.728564
## TUBA3C Cancer_cells 0.0001663410 1.646592
## CCL22 Cancer_cells 0.0009392022 1.964587
## TUBB2B Cancer_cells 0.0001732457 1.629272
We can also observe the expression of known marker genes for cancer cells across the predicted cell types.
cancer_genes <- readRDS(file.path(datadir, 'Data/HRS_signature.rds'))
cell_type_info <- RCTD_pred@cell_type_info$info[[1]]
normalized_info <- apply(cell_type_info[intersect(rownames(cell_type_info), cancer_genes), ], MARGIN = 1, FUN = function(x) x/max(x))
heatmap(normalized_info, Colv = NA, Rowv = NA, scale="none")
We can now explore immune cell subtypes by running full mode on only pixels of a single cell type. We will focus on the three main cell types: CD4 T cells, CD8 T cells, and monocytes/macrophages.
RCTD_list <- readRDS(file.path(datadir, 'Objects/vignette_v1/cancer_RCTD_final.rds'))
RCTD_pred <- RCTD_list[[length(RCTD_list)]]
CD4_RCTD <- initialize.subtypes(RCTD_pred, 'CD4_T_cells', resolution = 1)
CD4_RCTD <- run.subtypes(CD4_RCTD, max_iter = 200, convergence_thresh = 1e-5)
saveRDS(CD4_RCTD, file.path(datadir, 'Objects/vignette_v1/CD4_RCTD.rds'))
CD8_RCTD <- initialize.subtypes(RCTD_pred, 'CD8_T_cells', resolution = 1)
CD8_RCTD <- run.subtypes(CD8_RCTD, max_iter = 200, convergence_thresh = 1e-5)
saveRDS(CD8_RCTD, file.path(datadir, 'Objects/vignette_v1/CD8_RCTD.rds'))
We can spatially plot cell subtypes.
predicted_plot <- function(x) {
plot_puck_continuous(RCTD_pred@originalSpatialRNA, colnames(RCTD_pred@originalSpatialRNA@counts), RCTD_pred@results$weights[, x], size = 0.1, my_pal = pals::brewer.blues(20)[2:20], title = sprintf('Cluster %s', x))
}
CD4_RCTD <- readRDS(file.path(datadir, 'Objects/vignette_v1/CD4_RCTD.rds'))
RCTD_pred <- CD4_RCTD[[length(CD4_RCTD)]]
plots = list()
for (i in 0:5) {
plots[[i+1]] <- predicted_plot(as.character(i))
}
ggarrange(plotlist = plots)
CD8_RCTD <- readRDS(file.path(datadir, 'Objects/vignette_v1/CD8_RCTD.rds'))
RCTD_pred <- CD8_RCTD[[length(CD8_RCTD)]]
plots = list()
for (i in 0:4) {
plots[[i+1]] <- predicted_plot(as.character(i))
}
ggarrange(plotlist = plots)